# devtools::install_local("geneRefineR/", force = T)
# library("geneRefineR") 
source("geneRefineR/R/functions.R") 

library(readxl)
library(DT)
library(data.table)
library(dplyr)
library(ggplot2)
library(plotly)
library(cowplot)
library(ggrepel)
library(curl)
library(biomaRt) 
# Ensembl LD API
library(httr)
library(jsonlite)
library(xml2)
library(gaston)
library(RCurl)
# *** susieR ****
# library(knitrBootstrap) #install_github('jimhester/knitrBootstrap')
library(susieR) # devtools::install_github("stephenslab/susieR")

sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
## 
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] susieR_0.6.2.0411  tidyr_0.8.1        RCurl_1.95-4.11   
##  [4] bitops_1.0-6       gaston_1.5.4       RcppParallel_4.4.2
##  [7] Rcpp_1.0.0         xml2_1.2.0         jsonlite_1.5      
## [10] httr_1.3.1         biomaRt_2.38.0     curl_3.2          
## [13] ggrepel_0.8.0      cowplot_0.9.3      plotly_4.7.1      
## [16] ggplot2_3.1.0      dplyr_0.7.6        data.table_1.11.8 
## [19] DT_0.4             readxl_1.1.0      
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-35      prettyunits_1.0.2    assertthat_0.2.0    
##  [4] rprojroot_1.3-2      digest_0.6.18        R6_2.4.0            
##  [7] cellranger_1.1.0     plyr_1.8.4           backports_1.1.2     
## [10] stats4_3.5.1         RSQLite_2.1.1        evaluate_0.11       
## [13] pillar_1.3.1         rlang_0.3.1          progress_1.2.0      
## [16] lazyeval_0.2.1       blob_1.1.1           S4Vectors_0.20.1    
## [19] Matrix_1.2-14        rmarkdown_1.10       stringr_1.4.0       
## [22] htmlwidgets_1.2      bit_1.1-14           munsell_0.5.0       
## [25] compiler_3.5.1       pkgconfig_2.0.2      BiocGenerics_0.28.0 
## [28] htmltools_0.3.6      tidyselect_0.2.4     expm_0.999-2        
## [31] tibble_2.0.1         IRanges_2.16.0       XML_3.98-1.12       
## [34] viridisLite_0.3.0    crayon_1.3.4         withr_2.1.2         
## [37] grid_3.5.1           gtable_0.2.0         DBI_1.0.0           
## [40] magrittr_1.5         scales_1.0.0         stringi_1.3.1       
## [43] bindrcpp_0.2.2       tools_3.5.1          bit64_0.9-7         
## [46] Biobase_2.42.0       glue_1.3.0           purrr_0.2.5         
## [49] hms_0.4.2            parallel_3.5.1       yaml_2.1.19         
## [52] AnnotationDbi_1.44.0 colorspace_1.3-2     memoise_1.1.0       
## [55] knitr_1.20           bindr_0.1.1
print(paste("susieR ", packageVersion("susieR")))  
## [1] "susieR  0.6.2.411"
# MINERVA PATHS TO SUMMARY STATISTICS DATASETS:
root <- "../../.."
Data_dirs = list(
  # ++++++++ GWAS SUMMARY STATS ++++++++ # 
  # Nall et al. (2019) w/ 23andMe Parkinson's GWAS 
  "Nalls_23andMe" = list(topSS="Data/Parkinsons/Nalls_23andMe/Nalls2019_S2_SummaryStats.xlsx",
                   # fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/23andme/PD_all_post30APRIL2015_5.2_extended.txt")),
                   fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/combined_meta/nallsEtAl2019_allSamples_allVariants.mod.txt")),  
  ## IGAP Alzheimer's GWAS
  "IGAP" = list(topSS="Data/Alzheimers/IGAP/Lambert_2019_AD_GWAS.xlsx",
                fullSS=file.path(root,"ad-omics/data/AD_GWAS/latest/IGAP_stage_1.1000G.phase3.20130502.tsv")),
  ## Marioni et al. (2018) Alzheimer's GWAS
  "Marioni_2018" = list(topSS=NA,
                        fullSS=file.path(root,"ad-omics/data/AD_GWAS/latest/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv")),
  ## Posthuma et al. (2018) Alzheimer's GWAS 
  "Posthuma_2018" = list(topSS="Data/Alzheimers/Posthuma/AD_target_SNP.xlsx", 
                fullSS=file.path(root,"ad-omics/data/AD_GWAS/latest/IGAP_stage_1.1000G.phase3.20130502.tsv")),

  # ++++++++ eQTL SUMMARY STATS ++++++++ #
  ## MESA eQTLs: African Americans
  "MESA_AFA" = list(topSS="Data/eQTL/MESA/AFA_eQTL_PTK2B.txt",
                   fullSS=file.path(root,"ad-omics/data/mesa/AFA_cis_eqtl_summary_statistics.txt")),
  ## MESA eQTLs: Caucasians
  "MESA_CAU" = list(topSS="Data/eQTL/MESA/CAU_eQTL_PTK2B.txt",
                   fullSS=file.path(root,"ad-omics/data/mesa/CAU_cis_eqtl_summary_statistics.txt.gz")),
  ## MESA eQTLs: Hispanics
  "MESA_HIS" = list(topSS="Data/eQTL/MESA/HIS_eQTL_PTK2B.txt",
                   fullSS=file.path(root,"ad-omics/data/mesa/HIS_cis_eqtl_summary_statistics.txt.gz")) 
  )
 

# Direct to local data files if not working on Minerva
if(getwd()=="/Users/schilder/Desktop/Fine_Mapping"){
  vcf_folder = F # Use internet if I'm working from my laptop
  Data_dirs$IGAP$fullSS <- "Data/Alzheimers/IGAP/IGAP_stage_1.1000G.phase3.20130502.tsv"
  Data_dirs$Marioni_2018$fullSS <- "Data/Alzheimers/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv"
  Data_dirs$Posthuma_2018$fullSS <- "Data/Alzheimers/Posthuma/phase3.beta.se.hrc.txt"
  Data_dirs$MESA_CAU$fullSS <- "Data/eQTL/MESA/CAU_cis_eqtl_summary_statistics.txt" 
} else{
  vcf_folder = "../1000_Genomes_VCFs/Phase1" # Use previously downloaded files if working on Minerva node
}  

Parkinson’s Disease

Nalls et. al. (2019) w/ 23andMe

top_SNPs <- import_topSNPs(
  file_path = Data_dirs$Nalls_23andMe$topSS,
  sheet="Data",
  chrom_col = "CHR", position_col = "BP", snp_col="SNP",
  pval_col="P, all studies", effect_col="Beta, all studies", gene_col="Nearest Gene",
  caption= "Nalls et al. (2018) w/ 23andMe PD GWAS Summary Stats")


finemapped_PD_Nalls_23andMe <- finemap_geneList(top_SNPs, geneList=c("LRRK2"),#c("LRRK2","GBAP1","SNCA","VPS13C","GCH1"),
                 file_path=Data_dirs$Nalls_23andMe$fullSS,
                 chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
                 pval_col = "p", effect_col = "beta", stderr_col = "se", 
                 vcf_folder = vcf_folder)

LRRK2

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing ../../../pd-omics/data/nallsEtAl2019/combined_meta/LRRK2_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-4819.31771667441” [1] “objective:-4817.53694272212” [1] “objective:-4817.5346354578” [1] “objective:-4817.53463247054” [1] “objective:-4817.5346324668” [1] “objective:-4817.53463246679”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

Alzheimer’s Disease

Posthuma (2018)

top_SNPs <- import_topSNPs(
  file_path = Data_dirs$Posthuma_2018$topSS,
  sheet = 3,
  chrom_col = "Chr", position_col = "bp", snp_col="SNP",
  pval_col="P", effect_col="Z", gene_col="Gene",
  caption= "Posthuma et al. (2018) AD GWAS Summary Stats")


finemapped_AD_Posthuma <- finemap_geneList(top_SNPs, geneList=c("CLU/PTK2B","APOE"),
                 file_path=Data_dirs$Posthuma_2018$fullSS,
                 effect_col = "BETA", stderr_col = "SE", position_col = "BP", 
                 vcf_folder = vcf_folder)

IGAP

top_SNPs <- import_topSNPs(
  file_path = Data_dirs$IGAP$topSS,
  sheet = 1,
  chrom_col = "CHR", position_col = "Position", snp_col="SNP",
  pval_col="Overall_P", effect_col="Overall_OR", gene_col="Closest gene",
  caption= "IGAP: AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )

finemapped_AD_IGAP <- finemap_geneList(top_SNPs, geneList=c("CLU"), 
                   file_path=Data_dirs$IGAP$fullSS,
                   chrom_col = "CHROM",  pval_col = "PVAL", snp_col = "SNP",
                   effect_col = "BETA", stderr_col = "SE", position_col = "POS", 
                   vcf_folder = vcf_folder, num_causal = 1)

CLU

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing ../../../ad-omics/data/AD_GWAS/latest/CLU_subset.txt …

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-4279.8600749241” [1] “objective:-4279.45807028138” [1] “objective:-4279.457850295” [1] “objective:-4279.45785017425”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

eQTL

AFR = AFA = African AMR = Ad Mixed American EAS = East Asian EUR = CAU = European SAS = South Asian

MESA - AFA

population <- "AFA"
finemapped_eQTL_MESA.AFA <- finemap_eQTL(population, gene =  "PTK2B", 
                                         fullSS_path = Data_dirs$MESA_AFA$fullSS)

Extracting relevant variants from fullSS… Extraction completed in 2.64 seconds

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_subset.txt … Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-13479.2077298861” [1] “objective:-13479.1606677746” [1] “objective:-13479.1606580607” [1] “objective:-13479.1606580587”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

MESA - CAU

population <- "CAU"
finemapped_eQTL_MESA.CAU <- finemap_eQTL(population, gene = "PTK2B", 
                                         fullSS_path = Data_dirs$MESA_CAU$fullSS)

Subset file already exists. Importing Data/eQTL/MESA/PTK2B_subset.txt …

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_subset.txt … Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-13479.2077287315” [1] “objective:-13479.1606640544” [1] “objective:-13479.1606572313” [1] “objective:-13479.1606572293”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

MESA - HIS

population <- "HIS"
finemapped_eQTL_MESA.HIS <- finemap_eQTL(population, gene = "PTK2B", 
                                         fullSS_path = Data_dirs$MESA_HIS$fullSS)

Subset file already exists. Importing Data/eQTL/MESA/PTK2B_subset.txt …

PTK2B

  • Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_subset.txt … Calculating Standard Error…

  • Creating LD matrix… LD Reference Panel = 1KG_Phase1 ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.

  • Fine mapping with SusieR… [1] “objective:-13479.2077267564” [1] “objective:-13479.160663846” [1] “objective:-13479.1606552371” [1] “objective:-13479.1606552352”

Credible Set:

## Warning: Removed 1 rows containing missing values (geom_hline).

Merge Results

# finemapped_eQTL_MESA.AFA <- finemapped_eQTL_MESA.HIS<- finemapped_eQTL_MESA.CAU
named_results_list <- list("Nalls_23andMe"=finemapped_PD_Nalls_23andMe,
                           "IGAP"=finemapped_AD_IGAP,
                           "eQTL_MESA.AFA"=finemapped_eQTL_MESA.AFA, 
                           "eQTL_MESA.CAU"=finemapped_eQTL_MESA.CAU, 
                           "eQTL_MESA.HIS"=finemapped_eQTL_MESA.HIS)
merged_results <- merge_finemapping_results(named_results_list, csv_path ="merged_results.csv")
createDT(merged_results, caption = "Finemapped Credible Sets for Multiple Datasets")
table(as.character(merged_results$SNP))
## 
##  rs11174697  rs11564199  rs11787077 rs140722239   rs1532276   rs1532278 
##           1           1           1           1           1           1 
##   rs2070926   rs2279590  rs28370664  rs28370830  rs28834970   rs4236673 
##           1           1           1           1           3           1 
##  rs73223431      rs7982   rs9331896 
##           3           1           1